#!/usr/bin/env Rscript

#          CEM_ATET_Code--Widower_Effects_in_Voter_Participation
#          =====================================================

# Date: 2012-02-18 16:24:48 PST


# Week/Ages
# ~~~~~~~~~

# Preamble
# ========
         library(cem)
           
  # |--------------------------+------------+------------+--------------+----------|
  # | Options:                 |            |            |              |          |
  # |--------------------------+------------+------------+--------------+----------|
  # | Impute gender            | IMPUTE     |            |              |          |
  # | Expunge by Cal policy    | FORCECULL  |            |              |          |
  # | Opposite sex only        | OPPSEX     |            |              |          |
  # | Gender specific          | MALE       | FEMALE     |              |          |
  # | Age                      | UNDER65    | OVER65     | 65TO79       | 80OROVER |
  # | Voting                   | SAME       | DIFFERENT  | MORE         | FEWER    |
  # | Voting                   | NOABSTAIN  | ABSTAIN    |              |          |
  # | Voting                   | ONEABSTAIN | TWOABSTAIN | THREEABSTAIN |          |
  # | Occupancy                | OCC2       | OCC3PLUS   |              |          |
  # | By age (instead of week) | AGE        |            |              |          |
  # | Add zip code data        | ZIP        |            |              |          |
  # | Pop density >= / < 1000  | RURAL      | URBAN      |              |          |
  # | Per capita inc 35000     | RICH       | POOR       |              |          |
  # | Gender comparison        | GENCOMP    | FEMALECTRL |              |          |
  # |--------------------------+------------+------------+--------------+----------|
           
         option <- list(c("SAME"))
         option.name <- c("Same Voting History")

# File
# ====

# Extract File
# ------------
   setwd('/home/whobbs/data/cavr')
   
   # find column numbers for AWK command
   header<-1:89
   names(header)<-c("pid_e","lastname_e","zip_e","state_e","localitycode_e","precinct_e","precinctpart_e","regdate_e","dob_e","dod_e","ageatdeath_e","gender_e","party_e","gg10_e","age_gg10_e","dfdgg10_e","gp10_e","age_gp10_e","dfdgp10_e","ss9_e","age_ss9_e","dfdss9_e","pg8_e","age_pg8_e","dfdpg8_e","dp8_e","pp8_e","gg6_e","gp6_e","ss5_e","pg4_e","pp4_e","male_e","female_e","dem_e","rep_e","death_e","hid","hid_occnum","pid_a","lastname_a","zip_a","state_a","localitycode_a","precinct_a","precinctpart_a","regdate_a","dob_a","dod_a","ageatdeath_a","gender_a","party_a","gg10_a","age_gg10_a","dfdgg10_a","gp10_a","age_gp10_a","dfdgp10_a","ss9_a","age_ss9_a","dfdss9_a","pg8_a","age_pg8_a","dfdpg8_a","dp8_a","pp8_a","gg6_a","gp6_a","ss5_a","pg4_a","pp4_a","male_a","female_a","dem_a","rep_a","death_a","ea_agediscrep","pop_density","median_income","avg_income","percapita_income","percent_foreign_naturalized", "hid_e_2010", "mover_e", "hid_a_2010", "mover_a", "registration", "birthplace", "language")
   header<-as.list(header)
   
   my.vars <- with(header, paste(age_gg10_a, age_gg10_e, ea_agediscrep, female_a, female_e, male_a, male_e, hid_occnum, ageatdeath_a, death_a, dfdgg10_a, dem_a, dem_e, rep_a, rep_e, gg10_a, pg8_a, gg6_a, gp6_a, ss5_a, pg4_a, gg10_e, gp10_e, ss9_e, pg8_e, pp8_e, gg6_e, gp6_e, ss5_e, pg4_e, mover_e, sep = ", $"))
   
   if ("ZIP" %in% unlist(option)){ # needed for Rural/Urban analysis
  my.vars <- paste(my.vars, header$pop_density, header$percapita_income, sep = ", $")
  }
  
   my.vars <- gsub("^", "$", my.vars)
   
   cmd<-paste("awk -F, '{print ", my.vars, "}' OFS=, cavr_analysisfile.csv > temp_analysis_file.csv", sep = "")
   
   if ("GENCOMP" %in% unlist(option)) {
   cmd <- paste("awk -F, 'NR==1||$", header$death_a, "==1{print ", my.vars, "}' OFS=, cavr_analysisfile.csv > temp_analysis_file.csv", sep = "")  
   }
  
   # extract only the necessary columns
   system(cmd, wait = TRUE)

# Read File
# ---------
  system('head -n10 temp_analysis_file.csv > temp_analysis_file_head.csv', wait = TRUE)
  for.classes <- read.csv('temp_analysis_file_head.csv', head = TRUE, stringsAsFactors = FALSE)
  names(for.classes) <- gsub("_", ".", names(for.classes)) # convert column names to R format
  casecontrol.names <- names(for.classes) # save names
  casecontrol.classes <- lapply(for.classes, class) # save column classes for faster read.csv
  
  casecontrol <- read.csv('temp_analysis_file.csv', head = TRUE, colClasses = casecontrol.classes)
  names(casecontrol) <- casecontrol.names # reassign names
  
  system("rm temp_analysis_file.csv", wait = FALSE)
  system("rm temp_analysis_file_head.csv", wait = FALSE)

# Prep
# ====

# R format
# --------
     # remove egos missing from the 2010 voter record, and missing live alters
     
  casecontrol<-casecontrol[complete.cases(casecontrol$gg10.e)&complete.cases(casecontrol$gg10.a),] # gg10.a is 989898 for deceased alters present in 2010 voter record and 999999 for deceased alters absent; gg10.a is NA for non-deceased alters absent
  
     # change 989898 values to NA
  casecontrol[casecontrol$gg10.a==989898,]$gg10.a <- NA
     # change 999999 values to NA
  ## casecontrol <- casecontrol[casecontrol$pg4.e == 1 & casecontrol$pg4.a == 1,]
  
     if (!("GENCOMP" %in% unlist(option))) {
     casecontrol[casecontrol$ageatdeath.a==999999,]$ageatdeath.a <- NA
     casecontrol[casecontrol$dfdgg10.a==999999,]$dfdgg10.a <- NA
   }
     

# CEM prep
# --------
    agediscrep.cuts <- c(-15.5, -5.5, -1.5, 1.5, 5.5, 15.5)
    age.cuts <- c(17.5,24.5,34.5,44.5,49.5,54.5,59.5,64.5,69.5,74.5,79.5,84.5,89.5,94.5,115.5)
    hid.cuts <- c(.5,1.5,2.5,6.5)
    week.cuts <- c(14.5, 25.5, 51.5, 80.5)
    
    # weeks to Gubernatorial General 2010
    week.begin <- seq(-108,557,7)
    week.end <- seq(-102,563,7)
    week <- (week.begin + 3) / 7
  
    setwd('/home/whobbs/scripts/widower_effects_analysis')
    
  # write original case script before i loop
    # system("sed 's/cases.in.model/original.cases/g' summary_scripts/stat_block_output.R > summary_scripts/stat_block_output_original.R", wait = TRUE)
      
  # begin option (i) loop
    for (i in 1:length(option)){

  cat("\nPartition:\n")
  print(option.name[i])
  print(option[[i]])

    if ("GENCOMP" %in% option[[i]]) {
      week.begin <- 102
      week.end <- 563
      week <- c(15, 80)
    }
  
    if ("AGE" %in% option[[i]]){
    my.cutpoints <- list(ea.agediscrep = agediscrep.cuts, hid.occnum = hid.cuts)
    } else if ("GENCOMP" %in% option[[i]]){
    my.cutpoints <- list(ea.agediscrep = agediscrep.cuts, age.gg10.e = age.cuts, hid.occnum = hid.cuts, week.cutpoints = week.cuts, age.gg10.a = age.cuts, ageatdeath.a = age.cuts)
    } else {
      my.cutpoints <- list(ea.agediscrep = agediscrep.cuts, age.gg10.a = age.cuts, hid.occnum = hid.cuts)
    }
    
  cat("\n\nCutpoints:\n")
  print(my.cutpoints)

# Set drop vars
# -------------
    to.drop <- c("death.a","age.gg10.e","dfdgg10.a","ageatdeath.a","gg10.e","gp10.e","ss9.e","pg8.e","pp8.e","gg10.a","pg8.a","gg6.a","gp6.a","ss5.a","pg4.a","mover.e")
    
    if ("ZIP" %in% unlist(option)){ # pop.density and percapita.income do not vary within zip codes
      to.drop<-c(to.drop, "pop.density", "percapita.income")
    }
    if ("REGDATE" %in% unlist(option) & !("REGDATE" %in% option[[i]])){
      to.drop <- c(to.drop, "regdate.e", "regdate.a")
    }
    if ("AGE" %in% option[[i]]){
      to.drop <- c(to.drop, "age.gg10.a")
    }
    if ("GENCOMP" %in% option[[i]]){
      to.drop <- c(to.drop, "female.e", "female.a", "male.e", "male.a", "age.gg10.a", "ea.agediscrep")
      to.drop <- to.drop[which(!(to.drop %in% c("age.gg10.e")))]
    }
  
  # display matching terms
    cat("\nMatching on:\n")
    print(names(casecontrol)[!(names(casecontrol) %in% to.drop)])
    cat("\n")

# Output prep
# -----------
  # write.csv directories and file names
  output.gg10<-paste("output/", option.name[i], " gg10.csv", sep = "")
  output.gp10<-paste("output/", option.name[i], " gp10.csv", sep = "")
  output.ss9<-paste("output/", option.name[i], " ss9.csv", sep = "")
  output.original<-paste("output/", option.name[i], " original.csv", sep = "")
  
  source("summary_scripts/stat_blocks.R")
  
  write.table(rbind(original.sample.header), output.original, col.names = FALSE, row.names = FALSE, sep = ",")
  write.table(rbind(matched.sample.header), output.gg10, col.names = FALSE, row.names = FALSE, sep = ",")
       
  if (!("AGE" %in% option[[i]] | "GENCOMP" %in% option[[i]])){
  write.table(rbind(matched.sample.header), output.gp10, col.names = FALSE, row.names = FALSE, sep = ",")
  write.table(rbind(matched.sample.header), output.ss9, col.names = FALSE, row.names = FALSE, sep = ",")
  }
       
  my.casecontrol <- casecontrol

# Options
# =======

# Impute gender
# -------------
  if ("IMPUTE" %in% option[[i]]){
    if (my.casecontrol$male.e==1&my.casecontrol$male.a==0&my.casecontrol$female.a==0){
      my.casecontrol$female.a<-1
    }
    if (my.casecontrol$female.e==1&my.casecontrol$male.a==0&my.casecontrol$female.a==0){
      my.casecontrol$male.a<-1
    }
    if (my.casecontrol$male.a==1&my.casecontrol$male.e==0&my.casecontrol$female.e==0){
      my.casecontrol$female.e<-1
    }
    if (my.casecontrol$female.a==1&my.casecontrol$male.e==0&my.casecontrol$female.e==0){
      my.casecontrol$male.e<-1
    }
  }

# Opposite sex only
# -----------------
  if ("OPPSEX" %in% option[[i]]){
  my.casecontrol <- my.casecontrol[my.casecontrol$male.e==my.casecontrol$female.a&my.casecontrol$female.a==1|my.casecontrol$female.e==my.casecontrol$male.a&my.casecontrol$male.a==1,]
  }

# Expunge
# -------
  if ("FORCECULL" %in% option[[i]]){
    my.casecontrol<-my.casecontrol[my.casecontrol$pg4.e==1&my.casecontrol$pg4.a==1&my.casecontrol$pg8.e==1&my.casecontrol$pg8.a==1,]
  }

# Gender
# ------
  if ("MALE" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$male.e==1,]
  }
  if ("FEMALE" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$female.e==1,]
  }

# Age
# ---
  if ("UNDER65" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$age.gg10.e<=64,]
  }
  if ("65OROVER" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$age.gg10.e>=65,]
  }
  if ("65TO79" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$age.gg10.e>=65&my.casecontrol$age.gg10.e<=79,]
  }
  if ("80OROVER" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$age.gg10.e>=80,]
  }

# Voting
# ------
  if ("SAME" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$gg6.e==my.casecontrol$gg6.a&my.casecontrol$gp6.e==my.casecontrol$gp6.a&my.casecontrol$ss5.e==my.casecontrol$ss5.a&my.casecontrol$pg4.e==my.casecontrol$pg4.a,]
  }
  if ("DIFFERENT" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$gg6.e!=my.casecontrol$gg6.a|my.casecontrol$gp6.e!=my.casecontrol$gp6.a|my.casecontrol$ss5.e!=my.casecontrol$ss5.a|my.casecontrol$pg4.e!=my.casecontrol$pg4.a,]
  }
  if ("MORE" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[(my.casecontrol$gg6.e+my.casecontrol$gp6.e+my.casecontrol$ss5.e+my.casecontrol$pg4.e)>(my.casecontrol$gg6.a+my.casecontrol$gp6.a+my.casecontrol$ss5.a+my.casecontrol$pg4.a),]
  }
  if ("FEWER" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[(my.casecontrol$gg6.e+my.casecontrol$gp6.e+my.casecontrol$ss5.e+my.casecontrol$pg4.e)<(my.casecontrol$gg6.a+my.casecontrol$gp6.a+my.casecontrol$ss5.a+my.casecontrol$pg4.a),]
  }
  if ("NOABSTAIN" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$gg6.e==1&my.casecontrol$gp6.e==1&my.casecontrol$ss5.e==1&my.casecontrol$pg4.e==1,]
  }
  if ("ABSTAIN" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$gg6.e==0|my.casecontrol$gp6.e==0|my.casecontrol$ss5.e==0|my.casecontrol$pg4.e==0,]
  }
  if ("ONEABSTAIN" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[(my.casecontrol$gg6.e+my.casecontrol$gp6.e+my.casecontrol$ss5.e+my.casecontrol$pg4.e)==3,]
  }
  if ("TWOABSTAIN" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[(my.casecontrol$gg6.e+my.casecontrol$gp6.e+my.casecontrol$ss5.e+my.casecontrol$pg4.e)==2,]
  }
  if ("THREEABSTAIN" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[(my.casecontrol$gg6.e+my.casecontrol$gp6.e+my.casecontrol$ss5.e+my.casecontrol$pg4.e)==1,]
  }

# Occupancy
# ---------
  if ("OCC2" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$hid.occnum==2,]
  }
  if ("OCC3PLUS" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$hid.occnum>=3,]
  }

# Rural/Urban
# -----------
  if ("RURAL" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$pop.density<1000,]
  }
  if ("URBAN" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$pop.density>=1000,]
  }

# Rich/poor
# ---------
  if ("RICH" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$percapita.income>=35000,]
  }
  if ("POOR" %in% option[[i]]) {
    my.casecontrol<-my.casecontrol[my.casecontrol$percapita.income<35000,]
  }  

# Movers
# ------

  if ("MOVERS" %in% option[[1]] ){
    my.casecontrol <- my.casecontrol[my.casecontrol$mover.e==0,]
    }
  if ("NONMOVERS" %in% option[[1]] ){
    my.casecontrol <- my.casecontrol[my.casecontrol$mover.e==1,]
    }
  
### Registration

  if ("I" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$registration=="I",]
  }
  if ("M" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$registration=="M",]
  }
  if ("O" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$registration=="O",]
  }
  if ("D" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$registration=="D",]
  }
  if ("R" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$registration=="R",]
  }
  if ("U" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$registration=="U",]
  }  
  
### Birthplace

  if ("CA" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$birthplace=="CA",]
  }

  if ("NOTCA" %in% option[[i]]) {
   my.casecontrol<-my.casecontrol[my.casecontrol$birthplace!="CA",]
  }    

# Match
# =====

# TODO Start week (j) loop (must export only week or age)
# -------------------------------------------------------
  # note: week.begin & week.end do not overlap
  
    for (j in 1:length(week.begin)){
  
    casecontrol.select <- my.casecontrol[my.casecontrol$dfdgg10.a>=week.begin[j]&my.casecontrol$dfdgg10.a<=week.end[j]|is.na(my.casecontrol$dfdgg10.a),]
  
    if (j == 33) {cat("\nj loop (1-32): done\n")}
    if (j == 65) {cat("j loop (1-64): done\n")}
    if (j == 96) {cat("Now on j loop 96 (final)\n")}

# Original summary stats
# ----------------------
  original.cases <- casecontrol.select[casecontrol.select$death.a==1,]
  
  if ("FEMALECTRL" %in% option[[i]]) {
    original.cases <- casecontrol.select[casecontrol.select$male.e==1,]
  }
  source('summary_scripts/stat_block_output_original.R')
  write.table(rbind(original.cases.stats), file = output.original, append = TRUE, col.names = FALSE, row.names = FALSE, sep = ",")
  
  rm(original.cases)

# Match
# -----
  if ("GENCOMP" %in% option[[i]]) {
  mat <- cem(treatment = "male.e", data = casecontrol.select, drop = to.drop, cutpoints = my.cutpoints)
  
  } else {
  mat<-cem(treatment = "death.a", data = casecontrol.select, drop = to.drop, cutpoints = my.cutpoints)
  
  }
  
  if ("GENCOMP" %in% option[[i]]) {
    cases.in.model<-casecontrol.select[mat$matched==1&casecontrol.select$male.e==1,]
  
  } else {
  cases.in.model<-casecontrol.select[mat$matched==1&casecontrol.select$death.a==1,]
  }

# CEM & ATT
# ---------
      if ("GENCOMP" %in% option[[i]]) {
      est <- att(mat, gg10.e ~ male.e, data = casecontrol.select)
      source('summary_scripts/stat_block_output.R')
      source('summary_scripts/stat_cem_output.R')
      write.table(rbind(cases.cem.stats), file = output.gg10, append = TRUE, col.names = FALSE, row.names = FALSE, sep = ",")
  
    } else {
      est<-att(mat, gg10.e ~ death.a, data = casecontrol.select)
      source('summary_scripts/stat_block_output.R')
      source('summary_scripts/stat_cem_output.R')
      write.table(rbind(cases.cem.stats), file = output.gg10, append = TRUE, col.names = FALSE, row.names = FALSE, sep = ",")
    
      week <- week - 21; week.begin <- week.begin - 21*7; week.end <- week.end - 21*7
      est<-att(mat, gp10.e ~ death.a, data = casecontrol.select)
      source('summary_scripts/stat_block_output.R')
      source('summary_scripts/stat_cem_output.R')
      write.table(rbind(cases.cem.stats), file = output.gp10, append = TRUE, col.names = FALSE, row.names = FALSE, sep = ",")
      
      week <- week + 21 - 76; week.begin <- week.begin + 21*7 - 76*7; week.end <- week.end + 21*7 - 76*7
      est<-att(mat, ss9.e ~ death.a, data = casecontrol.select)
      source('summary_scripts/stat_block_output.R')
      source('summary_scripts/stat_cem_output.R')
      write.table(rbind(cases.cem.stats), file = output.ss9, append = TRUE, col.names = FALSE, row.names = FALSE, sep = ",")
  
      week <- week + 76; week.begin <- week.begin + 76*7; week.end <- week.end + 76*7
      }
  
      rm(mat)
      rm(est)
      rm(casecontrol.select)
      rm(cases.in.model)
    
    # end age/week (j) loop
    }  
    
    rm(my.casecontrol)
     
    # end option (i) loop
    }
    
  #    system("rm summary_scripts/stat_block_output_original.R", wait = FALSE)

